
import requests
import re
ulr='https://www.ithome.com/0/759/777.htm'
resp=requests.get(ulr)
resp.encoding='utf-8'
print(resp.text)
person=re.findall('</p></li><li><p data-vmark="([\w]*)">([\u4e00-\u9fa5]*)',resp.text)
print(person)
lst=[]
pattern=r'[\u4e00-\u9fa5]+'
for i in person:
    for item in i:
        if re.match(pattern,item):
            lst.append(item)
        else:
            print(item)
print(lst)
money=re.findall('([\d]*) 亿美元',resp.text)
print(money)
lst1=[]
for a,b in zip(lst,money):
    lst1.append([a,b])
for x in lst1:
    print(x)



